Visualize the Flora Danica Cleaned Metadata

Contents

Visualize the Flora Danica Cleaned Metadata#

Before you start#

Before you begin running this script on your own computer, please note that the script uses the following libraries, which are not part of the standard packages. You can install them from your notebook using pip install. Create a new cell and run the following:

!pip install matplotlib plotly-express

# Import libraries
import pandas as pd
import plotly.express as px
# Load the cleaned dataset
subset_df = pd.read_csv(r'.\mekuni_flora_danica_data\flora_danica_tidy_format.csv')
# Count and plot authors
author_data_in = subset_df['author_st'].value_counts().to_frame().reset_index()

fig = px.bar(author_data_in, x='author_st', y='count',
             color_discrete_sequence=["#13dbb7"],
                 title='Authors and Number of Published Plates')


# Update layout for better readability
fig.update_layout(
    xaxis_title='Authors',
    yaxis_title='Count',
    xaxis_tickangle=-45,  # Rotate x-axis labels for better readability
    yaxis=dict(showgrid=True, gridcolor="#45d6d3", gridwidth=0.5),  # Add gridlines
    plot_bgcolor='white'  # Set the background color to white
)

fig.show()
taxonomy_data_in = subset_df['taxonomic_group_st'].value_counts().to_frame().reset_index()
taxonomy_data_in = taxonomy_data_in.query('count > 15')

# Use Plotly to create a chart
fig = px.bar(taxonomy_data_in,x='taxonomic_group_st', y='count',
             color_discrete_sequence=["#ea7600"],
                 title='Plant Groups')


# Customize layout to improve readability
fig.update_layout(
    xaxis_title='Taxonomic Group',
    yaxis_title='Count',
    xaxis_tickangle=-45,  # Rotate x-axis labels
    yaxis=dict(showgrid=True, gridcolor="#45d6d3", gridwidth=0.5),  # Add gridlines
    plot_bgcolor='white'  # Set background color to white
)

fig.show()
print ('Prepare data for visualization; group data by "Latin family name" and count values in "Latin name"')
count_df = subset_df.groupby('latin_family_name')['latin_name'].value_counts().to_frame()\
                        .query('count >= 1').reset_index()

# Build an interactive tree diagram with Plotly
fig = px.treemap(count_df, path=['latin_family_name', 'latin_name'], values='count',
                 color_discrete_sequence=["#45d6d3", "#2224ba", "#ee138c", "#ea7600", "#13dbb7"],
                 title='Tree Visualization of Plant Families and Species')

# Show the figure
fig.show()
Prepare data for visualization; group data by "Latin family name" and count values in "Latin name"
print('Count the values in the column "Latin family name".')

family_names_value_count = subset_df['latin_family_name'].value_counts().reset_index()

print ('Select the first 25 rows for visualization.')
plot_df = family_names_value_count.head(25).copy()

# Create an interactive bar chart with Plotly
fig = px.bar(plot_df, 
             x='latin_family_name', 
             y='count', 
             title='Different Plant Families',
             labels={'latin_family_name': 'Latin Family Name', 'count': 'Count'},
             color_discrete_sequence=['#ee138c'])

# Customize layout for better readability
fig.update_layout(
    xaxis_title='Latin Family Name',
    yaxis_title='Count',
    xaxis_tickangle=-45,  # Rotate x-axis labels
    yaxis=dict(showgrid=True, gridcolor="#45d6d3", gridwidth=0.5),  # Add gridlines
    plot_bgcolor='white'  # Background color becomes white
)

# Show the chart
fig.show()
Count the values in the column "Latin family name".
Select the first 25 rows for visualization.